############################################################################
# Replication Code for                                                     #
# "The Effects of Forced Versus Selective Exposure to Propaganda in China" #
# Political Science Research and Methods                                   #
# Author: Xiaoxiao SHEN                                                    #
# Date: July 6, 2025                                                       #
############################################################################



rm(list=ls())

# Install necessary packages if not already installed
if (!require("ggplot2")) install.packages("ggplot2")
if (!require("broom")) install.packages("broom")
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("plyr")) install.packages("plyr")
if (!require("dplyr")) install.packages("dplyr")
if (!require("fuzzyjoin")) install.packages("fuzzyjoin")
if (!require("stringr")) install.packages("stringr")
if (!require("knitr")) install.packages("knitr")
if (!require("gridExtra")) install.packages("gridExtra")
if (!require("estimatr")) install.packages("estimatr")
if (!require("Matrix")) install.packages("Matrix")
if (!require("mediation")) install.packages("mediation")
if (!require("GGally")) install.packages("GGally")
if (!require("stargazer")) install.packages("stargazer")
if (!require("irr")) install.packages("irr")
if (!require("GK2011")) install.packages("GK2011")
if (!require("psych")) install.packages("psych")
if (!require("nFactors")) install.packages("nFactors")



### Step1: Load packages ###

library(ggplot2)
library(broom)
library(tidyverse)
library(plyr)
library(dplyr)
library(fuzzyjoin)
library(stringr)
library(knitr)
library(gridExtra)
library(estimatr)
library(Matrix)
library(mediation)
library(GGally)
library(stargazer)
library(irr)
library(GK2011)
library(psych)
library(nFactors)



# Set data directory --- Please replace "XXX" with the appropriate path
data_dir <- "/XXX/Replication_Xiaoxiao Shen_PSRM/Data"
output_dir <- "/XXX/Replication_Xiaoxiao Shen_PSRM/Output/Survey1"
if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}



### Step2: Load data ###
header <- read.csv(file.path(data_dir, "Survey1.csv"), header = FALSE, nrows = 1, as.is = TRUE, fileEncoding = "ISO-8859-1")
data <- read.csv(file.path(data_dir, "Survey1.csv"), header= FALSE , na=c("", "NA"), fileEncoding = "ISO-8859-1") # 1030 obs. of 124 variables 
colnames(data) = header
rm(header) 

data <- data%>%
        filter(data$"Finished"== 1 ) # 937 obs. of 124 variables 
data <- data%>%
        filter(data$"consent"== 1 ) # 928 obs. of 124 variables
data <- data%>%
        filter(data$"summary_check_RA1"!= 99 ) # 858 obs. of 124 variables
data <- data%>%
        filter(data$"summary_check_RA2"!= 99 ) # 858 obs. of 124 variables
# data <- data%>%
#         filter(data$"thought_check_RA1"!= 99 )
# data <- data%>%
#         filter(data$"thought_check_RA2"!= 99 )



### Step3: Data Cleaning ###

# 3.1 Assign treatment
data$treatment=0

treatment1=which(data$group=="1_forced_propaganda")
data$treatment[treatment1]=1 # force_polit

treatment2=which(data$group=="2_forced_nonpropaganda")
data$treatment[treatment2]=2 # force_science

treatment3=which(data$select==1)
data$treatment[treatment3]=3 # select_polit

treatment4=which(data$select==2)
data$treatment[treatment4]=4 # select_science

# drop=which(data$treatment==0)
# data=data[-drop,] # some samples do not have a clear type

data$treatment=factor(data$treatment,
                    labels=c("force_polit","force_science","select_polit","select_science")) # 858 obs. of 125 variables

table(data$treatment) # force_polit:203, force_science:214, select_polit:235, select_science:206

rm(treatment1);rm(treatment2);rm(treatment3);rm(treatment4)


# 3.2 Recode material (1 = read before)
data$material[data$material==2] <- 0 # 858 obs. of 125 variables
data$material <- as.numeric(data$material)
table(data$material) # 0:664, 1:194


# 3.3 Recode gender (1 = female)
data$gender <- ifelse(data$gender==2,1,0) # 858 obs. of 125 variables
data$gender <- as.numeric(data$gender)
table(data$gender) # 0:396, 1:462


# 3.4 Recode age (in years)
# (1) Delete obs under 18
# (2) 1 for obs 30 or younger, 0 for obs older than 30
data$age <- as.character(data$age)
class(data$age)
data$age <- 2020-as.numeric(data$age)
data$age[data$age>=100|data$age<=0] <- NA
data <- data%>%
        filter(data$"age">= 18 ) 
data$age <- ifelse(data$age <= 30,1,0) # 830 obs. of 125 variables
data$age <- as.numeric(data$age)
table(data$age) # 0: 144, 1: 686


# 3.5 Recode education
# 1 for college degree "7" or above, 0 for others
data$education[data$education==10] <- NA
data$education <- ifelse(data$education >= 7,1,0) # 830 obs. of 125 variables
data$education <- as.numeric(data$education)
table(data$education) # 0: 380， 1: 450


# 3.6 Recode urban hukou
# 0 for urban hukou, 0 for others
data$hukou[data$hukou==5] <- NA
data$urban <- ifelse(data$hukou==2|data$hukou==4,1,0) # 830 obs. of 125 variables
data$urban <- as.numeric(data$urban)
table(data$urban) # 0: 408, 1: 421


# 3.7 Recode CCP
# 1 for CCP member, 0 for others
data$party <- ifelse(data$party==1,1,0) # 830 obs. of 125 variables
data$party <- as.numeric(data$party)
table(data$party) # 0: 716, 1:114


# 3.8 Recode income
# 1 for high income, 0 for others
# Code 4,5,6 as high income
data$income <- ifelse(data$income==7|data$income==8|data$income==9,0,data$income)
data$income <- ifelse(data$income==4|data$income==5|data$income==6,1,0) # 830 obs. of 125 variables
data$income <- as.numeric(data$income) 
table(data$income) # 0:592, 1: 238


# 3.9 Recode knowledge
# Respondents' political knowledge is scored as follows: 
# 0 if both questions are answered incorrectly; 
# 0.5 if one is answered correctly; 
# and 1 if both are answered correctly.
data$political_knowledge1 <- ifelse(data$political_knowledge1==1,1,0)
table(data$political_knowledge1) # 0: 175, 1: 655
data$political_knowledge2 <- ifelse(data$political_knowledge2==4,1,0)
table(data$political_knowledge2) # 0: 426, 1: 404
data$knowledge <- (data$political_knowledge1+data$political_knowledge2)/2 # 830 obs. of 126 variables
data$knowledge <- as.numeric(data$knowledge) 
table(data$knowledge) # 0: 129， 0.5: 343， 1: 358


# 3.10 Recode marriage
# 0 = No (never married/divorced/other), 1 = Yes (married/widowed)
data$marriage <- ifelse(data$marriage==2|data$marriage==4, 1, 0) # 830 obs. of 126 variables
data$marriage <- as.numeric(data$marriage)
table(data$marriage) # 0:584, 1:246


# 3.11 Recode interst
data$interest <- data$political_interest
data$interest <- as.numeric(data$interest) # 830 obs. of 127 variables
table(data$interest)  # 1: 12, 2: 125, 3: 281, 4: 353, 5: 59


# 3.12 Recode news_consumption
data$news_consumption <- as.numeric(data$news_consumption) # 830 obs. of 127 variables
table(data$news_consumption) # 1:145, 2: 117, 3: 270, 4: 162, 5:136


# 3.13 Recode occupation 
# Government agency is coded as 1 
# (1: government department, 
# 2: state-owned public institution [science, education, culture, health], 
# 3: state-owned enterprise); 
# non-government agency is coded as 0.
data$occupation <- ifelse(data$occupation==1|data$occupation==2|data$occupation==3, 1, 0) # 830 obs. of 127 variables
data$occupation <- as.numeric(data$occupation)
table(data$occupation) # 0: 652, 1: 178


# 3.14 Recode major
# 1 for social science, 0 for non-social science
data$major <- ifelse(data$major==3, 1, 0) # 830 obs. of 127 variables
data$major <- as.numeric(data$major)
table(data$major) # 0: 691, 1:139


# 3.15 Recode reading_time
data$reading_time <- as.numeric(data$reading_time) # 830 obs. of 127 variables


# 3.16 Recode confidence_2
data$confidence_2 <- as.numeric(data$confidence_2) # 830 obs. of 127 variables
which(colnames(data) == "confidence_2")

data[,60]
# 1<->5
condition=data[,60]==1
data[condition,60]=6
condition=data[,60]==5
data[condition,60]=1
condition=data[,60]==6
data[condition,60]=5
# 2<->4
condition=data[,60]==2
data[condition,60]=6
condition=data[,60]==4
data[condition,60]=2
condition=data[,60]==6
data[condition,60]=4

rm(condition) 


# 3.17 Recode ethnicity
data$ethnicity <- ifelse(data$ethnicity==1, 1, 0) # 830 obs. of 127 variables
data$ethnicity <- as.numeric(data$ethnicity)
table(data$ethnicity) # 0: 47, 1: 783


# 3.18 summary_check_RA
# Use package irr to calculate intercoder reliability，method: Cohen's Kappa
table(data$summary_check_RA1)
table(data$summary_check_RA2)
data$summary_check_RA1 <- as.numeric(data$summary_check_RA1)
data$summary_check_RA2 <- as.numeric(data$summary_check_RA2)

kappa_result <- kappa2(data.frame(data$summary_check_RA1, data$summary_check_RA2))


# 3.19 Summary statistics
selected_vars <- c("gender", "age", "education", "urban", "party", "income", "marriage", "occupation", "major", "ethnicity")

one_proportions <- sapply(data[selected_vars], function(x) mean(x == 1, na.rm = TRUE))

summary_stat <- data.frame(
  variable = names(one_proportions),
  proportion_of_1 = round(one_proportions, 3),
  stringsAsFactors = FALSE
)

summary_stat <- rbind(
  summary_stat,
  data.frame(
    variable = "N",
    proportion_of_1 = nrow(data)
  )
)

write.csv(summary_stat, file = file.path(output_dir, "3_summary_stat.csv"), row.names = FALSE)



### Step4: Balance Plot ###

data$forced_prop <- data$group=="1_forced_propaganda" # 830 obs. of 128 variables
data$forced_nonprop <- data$group=="2_forced_nonpropaganda" # 830 obs. of 129 variables
data$select <- data$group=="3_select" 

forced_prop <- summary(lm(forced_prop~gender+age+urban+education+income+party+marriage+knowledge+interest+news_consumption+ethnicity+major+occupation,data=data))
forced_nonprop <- summary(lm(forced_nonprop~gender+age+urban+education+income+party+marriage+knowledge+interest+news_consumption+ethnicity+major+occupation,data=data))
select <- summary(lm(select~gender+age+urban+education+income+party+marriage+knowledge+interest+news_consumption+ethnicity+major+occupation,data=data))

forced_prop.1 <- as.data.frame(forced_prop$coefficients)
forced_prop.1$Group <- 'forced_prop'

forced_nonprop.1 <- as.data.frame(forced_nonprop$coefficients)
forced_nonprop.1$Group <- 'forced_nonprop'

select.1 <- as.data.frame(select$coefficients)
select.1$Group <- 'select'

all_4.rap = rbind.data.frame(forced_prop.1, forced_nonprop.1, select.1)
all_4.rap$Estimate = round(all_4.rap$Estimate, 3)
all_4.rap$`Std. Error` = round(all_4.rap$`Std. Error`, 3)
all_4.rap$`t value` = round(all_4.rap$`t value`, 3)
all_4.rap$`Pr(>|t|)` = round(all_4.rap$`Pr(>|t|)`, 3)
write.csv(all_4.rap, file = file.path(output_dir, "4_balance_check.csv"), row.names = TRUE)



### Step5: Manipulation check ###

data$class=0 # 830 obs. of 130 variables

class1=which(data$group=="1_forced_propaganda")
data$class[class1]=1 # forced

class1=which(data$group=="2_forced_nonpropaganda")
data$class[class1]=1 # forced

class2=which(data$group=="3_select")
data$class[class2]=2 # select

data$class=factor(data$class,
                labels=c("forced","select"))

table(data$class) # forced: 401, select: 429

### First, divide the observation into forced and selective, and then look at the proportion of "1" in the two RAs.

# # Calculate the proportion of 0 and 1 in summary_check_RA1 in the group with class "forced"
table_RA1_forced <- table(data$class == "forced", data$summary_check_RA1)
prop_RA1_forced <- prop.table(table_RA1_forced, 1)

# Calculate the proportion of 0 and 1 in summary_check_RA2 in the group with class "forced"
table_RA2_forced <- table(data$class == "forced", data$summary_check_RA2)
prop_RA2_forced <- prop.table(table_RA2_forced, 1)

# Results
prop_RA1_forced
prop_RA2_forced

# See whether there is a significant difference in the proportions of 0 and 1 in
# summary_check_RA1 and summary_check_RA2 among the people whose classes are forced and selected.

# Generate a contingency table of class and summary_check_RA1
table_RA1 <- table(data$class, data$summary_check_RA1)
table_RA2 <- table(data$class, data$summary_check_RA2)

# Compare whether there is a significant difference in the proportions of 0 and 1 in the forced and select groups
sink(file.path(output_dir, "5_accuracy_of_article_summaries.txt"))
print("The proportion of 0 and 1 in summary_check_RA1 in the group with class `forced`")
prop_RA1_forced
print("The proportion of 0 and 1 in summary_check_RA2 in the group with class `forced`")
prop_RA2_forced
chisq.test(table_RA1)
chisq.test(table_RA2)
sink()


# effort
data$effort = as.numeric(data$effort)
data$class = as.factor(data$class)

summary(aov(data$effort~class,data=data))
TukeyHSD(aov(data$effort~class,data=data))

effort.1=TukeyHSD(aov(data$effort~class,data=data))
effort.1=as.data.frame(effort.1$class)


# trivialization 
data$trivialization = as.numeric(data$trivialization)
data$class = as.factor(data$class)

summary(aov(data$trivialization~class,data=data)) 
TukeyHSD(aov(data$trivialization~class,data=data))

trivialization.1=TukeyHSD(aov(data$trivialization~class,data=data))
trivialization.1=as.data.frame(trivialization.1$class)

manipulation_check = rbind.data.frame(effort.1, trivialization.1)
manipulation_check$SE = (manipulation_check$upr - manipulation_check$lwr)/1.96/2
manipulation_check = round(manipulation_check, 3)
manipulation_check$Variable = c("Effort", "Trivialization")

write.csv(manipulation_check, file = file.path(output_dir, "5_manipulation_check.csv"), row.names = TRUE)



### Step6: Marginal effect ###

# 6.1 reverse function
reverse=function(x,old,new,store=-1){ 
        # the function reverses the original score to a new one
        # "store" is a temp store which keeps the dt, make sure "can" is not equal to any one of "old" and "new"
        # make sure old and new are the same length
        # "x" is a vector
        numb=length(old)
        for(i in 1:numb){
                x[which(x==old[i])] = store
                x[which(x==new[i])] = old[i]
                x[which(x==store)] = new[i]
        }
        return(x) # return a vector
}


# 6.2 rescale covariates
range_0_1=function(x,range=NULL){ 
        # if do not give range, make sure both best and worst answer has sample
        if(is.null(range)){
                min=min(x,na.rm = TRUE)
                max=max(x,na.rm = TRUE)
        }else{
                min=min(range)
                max=max(range)
        }
        if(min>0){
                print(min)
                warning("minimum value larger than 0, make sure it is correct")
        }
        result=(x-min)/(max-min)
        return(result)
}


# 6.3 select_polit=1, select_science=0
data$select_polit=NA # 830 obs. of 131 variables
data$select_polit[which(data$treatment=="select_polit")]=1
data$select_polit[which(data$treatment=="select_science")]=0 # no not need to choose extra variable, just use "select" is enough
table(data$select_polit) # 0: 198, 1: 231


# 6.4 prepare: political_ideology / political_interest / political_knowledge / news_consumption

# 6.4.1 political_ideology
std=function(x){
        return((x-mean(x))/sd(x))
}

data$political_ideology_1 = as.numeric(data$political_ideology_1)
data$political_ideology_2 = as.numeric(data$political_ideology_2)
data$political_ideology_3 = as.numeric(data$political_ideology_3)

data$ideology=data$political_ideology_1+data$political_ideology_2+data$political_ideology_3 # 830 obs. of 132 variables


# 6.4.2 political_interest
data$interest = as.numeric(data$interest)
table(data$interest) # 1:12, 2:125, 3:281, 4:353, 5:59

data$interest=range_0_1(data$interest)
table(data$interest) # 0:12, 0.25:125, 0.5:281, 0.75:353, 1:59 


# 6.4.3 political_knowledge
data$knowledge = as.numeric(data$knowledge)
table(data$knowledge) # 0:129, 0.5:343, 1:358

data$knowledge=range_0_1(data$knowledge)
table(data$knowledge) # 0:129, 0.5:343, 1:358


# 6.4.4 news_consumption
data$news_consumption <- as.numeric(data$news_consumption)
table(data$news_consumption) # 1:145, 2: 117, 3: 270, 4: 162, 5:136

data$news_consumption=range_0_1(data$news_consumption)
table(data$news_consumption) # 0:145, 0.25:117, 0.5:270, 0.75:162, 1:136


# 6.5 logistic regression

# 6.5.1 binomial links to logit regression
marginal.reg=glm(select_polit~gender+age+urban+education+income+party+marriage+knowledge+interest+news_consumption+ethnicity+major+occupation,data=data,family="binomial")


# 6.5.2 summary 
marginal_reg_summary = as.data.frame(summary(marginal.reg)$coefficients)
marginal_reg_summary = round(marginal_reg_summary, 3)
write.csv(marginal_reg_summary, file = file.path(output_dir, "8_marginal_effects.csv"), row.names = TRUE)



### Step7: Main effects ###

# 7.1 rand
data$rand=0 # 830 obs. of 134 variables

rand1=which(data$group=="1_forced_propaganda")
data$rand[rand1]=1 # random

rand1=which(data$group=="2_forced_nonpropaganda")
data$rand[rand1]=1 # random

rand2=which(data$group=="3_select")
data$rand[rand2]=0 # select

table(data$rand) # 0: 429, 1: 401


# 7.2 tr
data$tr=0 # 830 obs. of 135 variables

tr1=which(data$treatment=="force_polit")
data$tr[tr1]=1 # treated

tr1=which(data$treatment=="select_polit")
data$tr[tr1]=1 # treated

tr2=which(data$treatment=="force_science")
data$tr[tr2]=0 # control

tr2=which(data$treatment=="select_science")
data$tr[tr2]=0 # control

table(data$tr) # 0: 404, 1: 426


# 7.3 gk.estimate
data$central_trust = as.numeric(data$central_trust)
data$local_trust = as.numeric(data$local_trust)
data$central_satisfaction = as.numeric(data$central_satisfaction)
data$local_satisfaction = as.numeric(data$local_satisfaction)
data$evaluation_now = as.numeric(data$evaluation_now)
data$evaluation_future = as.numeric(data$evaluation_future)
data$confidence_1 = as.numeric(data$confidence_1)
data$confidence_2 = as.numeric(data$confidence_2)
data$efficacy_1 = as.numeric(data$efficacy_1)
data$efficacy_2 = as.numeric(data$efficacy_2)
data$pride_1 = as.numeric(data$pride_1)
data$pride_2 = as.numeric(data$pride_2)

set.seed(12345)
gk.estimate=list()
gk.estimate[[1]]=estimate(rand = data$rand, tr = data$tr, y = data$central_trust)
gk.estimate[[2]]=estimate(rand = data$rand, tr = data$tr, y = data$local_trust)
gk.estimate[[3]]=estimate(rand = data$rand, tr = data$tr, y = data$central_satisfaction)
gk.estimate[[4]]=estimate(rand = data$rand, tr = data$tr, y = data$local_satisfaction)
gk.estimate[[5]]=estimate(rand = data$rand, tr = data$tr, y = data$evaluation_now)
gk.estimate[[6]]=estimate(rand = data$rand, tr = data$tr, y = data$evaluation_future)
gk.estimate[[7]]=estimate(rand = data$rand, tr = data$tr, y = data$confidence_1)
gk.estimate[[8]]=estimate(rand = data$rand, tr = data$tr, y = data$confidence_2)
gk.estimate[[9]]=estimate(rand = data$rand, tr = data$tr, y = data$efficacy_1)
gk.estimate[[10]]=estimate(rand = data$rand, tr = data$tr, y = data$efficacy_2)
gk.estimate[[11]]=estimate(rand = data$rand, tr = data$tr, y = data$pride_1)
gk.estimate[[12]]=estimate(rand = data$rand, tr = data$tr, y = data$pride_2)

list.name=c("central_trust","local_trust",
            "central_satisfaction","local_satisfaction",
            "evaluation_now","evaluation_future",
            "confidence_1","confidence_2",
            "efficacy_1","efficacy_2",
            "pride_1","pride_2")

names(gk.estimate)=list.name 

gk.estimate_central_trust = as.data.frame(gk.estimate$central_trust)
gk.estimate_local_trust = as.data.frame(gk.estimate$local_trust)
gk.estimate_central_satisfaction = as.data.frame(gk.estimate$central_satisfaction)
gk.estimate_local_satisfaction = as.data.frame(gk.estimate$local_satisfaction)
gk.estimate_evaluation_now = as.data.frame(gk.estimate$evaluation_now)
gk.estimate_evaluation_future = as.data.frame(gk.estimate$evaluation_future)

gk.estimate_confidence_1 = as.data.frame(gk.estimate$confidence_1)
gk.estimate_confidence_2 = as.data.frame(gk.estimate$confidence_2)
gk.estimate_efficacy_1 = as.data.frame(gk.estimate$efficacy_1)
gk.estimate_efficacy_2 = as.data.frame(gk.estimate$efficacy_2)
gk.estimate_pride_1 = as.data.frame(gk.estimate$pride_1)
gk.estimate_pride_2 = as.data.frame(gk.estimate$pride_2)

gk.estimate_central_trust['name'] = rep("central_trust", times=4)
gk.estimate_local_trust['name'] = rep("local_trust", times=4)
gk.estimate_central_satisfaction['name'] = rep("central_satisfaction", times=4)
gk.estimate_local_satisfaction['name'] = rep("local_satisfaction", times=4)
gk.estimate_evaluation_now['name'] = rep("evaluation_now", times=4)
gk.estimate_evaluation_future['name'] = rep("evaluation_future", times=4)

gk.estimate_confidence_1['name'] = rep("confidence_1", times=4)
gk.estimate_confidence_2['name'] = rep("confidence_2", times=4)
gk.estimate_efficacy_1['name'] = rep("efficacy_1", times=4)
gk.estimate_efficacy_2['name'] = rep("efficacy_2", times=4)
gk.estimate_pride_1['name'] = rep("pride_1", times=4)
gk.estimate_pride_2['name'] = rep("pride_2", times=4)

gk.estimate.rap=rbind.data.frame(gk.estimate_central_trust, gk.estimate_local_trust, 
                                 gk.estimate_central_satisfaction, gk.estimate_local_satisfaction, 
                                 gk.estimate_evaluation_now, gk.estimate_evaluation_future,
                                 gk.estimate_confidence_1, gk.estimate_confidence_2, 
                                 gk.estimate_efficacy_1, gk.estimate_efficacy_2,
                                 gk.estimate_pride_1, gk.estimate_pride_2)
# # 7.3 gk.estimate.rap
# gk.estimate.rap$Estimate = round(gk.estimate.rap$Estimate, 3)
# gk.estimate.rap$SE = round(gk.estimate.rap$SE, 3)
# gk.estimate.rap$t = round(gk.estimate.rap$t, 3)
# gk.estimate.rap$p = round(gk.estimate.rap$p, 3)

# 7.4 get.star
get.star=function(p){
        p_=abs(p)
        if(p_>1 | p_<0){stop("p_ value do not in (0,1) \n")}
        
        if(p_>=0.1){return("")}
        else if(p_>0.05){return("*")}
        else if(p_>0.01){return("**")}
        else{return("***")}
}


# 7.5 gk.summary
gk.summary=function(gk,name="variable",digit=3){
        dt=as.data.frame(matrix(nrow=2,ncol=4))
        names(dt)=c("name","ATE","selector","non_selector")
        
        for(i in 1:3){
                coef.value=round(gk$Estimate[i],digit)
                p_=gk$p[i]
                star=get.star(p_)
                dt[1,i+1]=paste(coef.value,star,sep="")
                se=paste("(",round(gk$SE[i],digit),")",sep="")
                dt[2,i+1]=se
        }
        
        dt[1,1]=name
        return(dt)
}


# 7.6 gk.gather
gk.gather=function(gk.list,digit=3,NA.insert=FALSE){
        n=length(gk.list) # if you have only one estimate, use gk.summary 
        list.name=names(gk.list)
        result=vector()
        for(i in 1:n){
                gk.table=gk.summary(gk.list[[i]],name=list.name[i],digit=digit)
                if(NA.insert){result=rbind.data.frame(result,NA,gk.table)}
                else{result=rbind.data.frame(result,gk.table)}
        }
        return(result)
}

gk1=gk.gather(gk.estimate,digit=3,NA.insert=FALSE)
gk1 

write.csv(gk1, file = file.path(output_dir, "7_treatment_effects.csv"), row.names = TRUE)



### Step8: political_ideology / political_interest / knowledge / news_consumption ###

# 8.1 political_ideology

# 8.1 high.ideology & low.ideology
data$ideology = as.numeric(data$ideology)
table(data$ideology)

data$high_ideology=ifelse(std(data$ideology)>=0,1,0)
table(data$high_ideology) # 0: 397, 1: 433

high.ideology <- data[data$high_ideology == 1,]
low.ideology <- data[data$high_ideology == 0,]


# 8.2 political_interest

# 8.2 high.interest & low.interest
data$interest = as.numeric(data$interest)
table(data$interest) # 0:12, 0.25:125, 0.5:281, 0.75:353, 1:59

data$high_interest=ifelse(std(data$interest)>=0,1,0)
table(data$high_interest) # 0:418, 1:412

high.interest <- data[data$high_interest == 1,]
low.interest <- data[data$high_interest == 0,]


# 8.3 knowledge

# 8.3 high.knowledge & low.knowledge
data$knowledge = as.numeric(data$knowledge)
table(data$knowledge) # 0:129, 0.5:343, 1:358

data$high_knowledge=ifelse(std(data$knowledge)>=0,1,0)
table(data$high_knowledge) # 0:472, 1:358

high.knowledge <- data[data$high_knowledge == 1,]
low.knowledge <- data[data$high_knowledge == 0,]


# 8.4 news_consumption

# 8.4 high.news_consumption & low.news_consumption
data$news_consumption = as.numeric(data$news_consumption)
table(data$news_consumption) # 0:145, 0.25:117,  0.5:270, 0.75:162, 1:136

data$high_news_consumption=ifelse(std(data$news_consumption)>=0,1,0)
table(data$high_news_consumption) # 0:532, 1:298

high.news_consumption <- data[data$high_news_consumption == 1,]
low.news_consumption <- data[data$high_news_consumption == 0,]


# 8.5 Data for combining
data_for_combining <- data %>%
        dplyr::select(age, education, ethnicity, gender, urban, income, major,
                      marriage, occupation, party,
                      ideology, interest, news_consumption, knowledge,
                      treatment, class, effort, trivialization,
                      central_trust, central_satisfaction, local_trust, local_satisfaction,
                      evaluation_now, evaluation_future, confidence_1, confidence_2,
                      efficacy_1, efficacy_2, pride_1, pride_2, rand, tr,
                      emotion_1, emotion_2, emotion_3, emotion_4, emotion_5, emotion_6, group,
                      summary_check_RA1, summary_check_RA2)

write.csv(data_for_combining, file = file.path(output_dir, "8_data_for_combining.csv"), row.names = TRUE)



### Step9: PCA Analysis ###

# 9.1 create new dataframe
dt <- as.data.frame(matrix(nrow=dim(data)[1]*2,ncol=0)) 
dt$central <- append(data$central_trust, data$central_satisfaction)
dt$local <- append(data$local_trust, data$local_satisfaction)
dt$evaluation <- append(data$evaluation_now, data$evaluation_future)
dt$confidence <- append(data$confidence_1, data$confidence_2)
dt$efficacy <- append(data$efficacy_1, data$efficacy_2)
dt$pride <- append(data$pride_1, data$pride_2)
dt$rand <- append(data$rand, data$rand)
dt$tr <- append(data$tr, data$tr)
dt$treatment <- append(data$treatment, data$treatment)
dt$emotion_1 <- append(data$emotion_1, data$emotion_1)
dt$emotion_2 <- append(data$emotion_2, data$emotion_2)
dt$emotion_3 <- append(data$emotion_3, data$emotion_3)
dt$emotion_4 <- append(data$emotion_4, data$emotion_4)
dt$emotion_5 <- append(data$emotion_5, data$emotion_5)
dt$emotion_6 <- append(data$emotion_6, data$emotion_6)
dt$effort <- append(data$effort, data$effort)
dt$trivialization <- append(data$trivialization, data$trivialization)


# 9.2 data pca analysis
result_dt=principal(dt[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt
dt$pca_score=result_dt$score[,1]
dt$pca_score

ev <- eigen(cor(dt[,c(1,2,4,5)])) 
nS <- nScree(x=ev$values)
plotnScree(nS)


# Proportion of Variance Explained
sink(file.path(output_dir, "9.0.1 PCA_Proportion of Variance.txt"))
pca_proportion <- princomp(cor(dt[, c(1, 2, 4, 5)]))
summary(pca_proportion)
sink()


# Loadings
sink(file.path(output_dir, "9.0.2 PCA_Loadings.txt"))
pca_proportion <- princomp(cor(dt[,c(1,2,4,5)]))
summary(pca_proportion)[2]
sink()


# Principal Component Analysis on Pro-regime Attitudes
gk_9.estimate=list()
gk_9.estimate[[1]] = estimate(rand = dt$rand, tr = dt$tr, y = dt$pca_score)
list_.name=c("PCA Score")
names(gk_9.estimate)=list_.name
gk_9.estimate_pca_score = as.data.frame(gk_9.estimate$`PCA Score`)
gk_9.estimate_pca_score['name'] = rep("PCA Score", times=4)
gk_9.estimate_pca_score
write.csv(gk_9.estimate_pca_score, file = file.path(output_dir, "9_pca_on_pro_regime_attitudes.csv"), row.names = TRUE)



### Step10: Potential Mechanisms ###

# 10.1 rand
data$rand=0

rand1=which(data$group=="1_forced_propaganda")
data$rand[rand1]=1 # random

rand1=which(data$group=="2_forced_nonpropaganda")
data$rand[rand1]=1 # random

rand2=which(data$group=="3_select")
data$rand[rand2]=0 # select

table(data$rand) # 0: 429, 1: 401


# 10.2 tr
data$tr=0

tr1=which(data$treatment=="force_polit")
data$tr[tr1]=1 # treated

tr1=which(data$treatment=="select_polit")
data$tr[tr1]=1 # treated

tr2=which(data$treatment=="force_science")
data$tr[tr2]=0 # control

tr2=which(data$treatment=="select_science")
data$tr[tr2]=0 # control

table(data$tr) # 0: 404, 1: 426


# 10.3 gk.estimate
set.seed(12345)

data$effort = as.numeric(data$effort)
data$trivialization = as.numeric(data$trivialization)
data$`emotion_1` = as.numeric(data$`emotion_1`)
data$`emotion_2` = as.numeric(data$`emotion_2`)
data$`emotion_3` = as.numeric(data$`emotion_3`)
data$`emotion_4` = as.numeric(data$`emotion_4`)
data$`emotion_5` = as.numeric(data$`emotion_5`)
data$`emotion_6` = as.numeric(data$`emotion_6`)

gk.estimate=list()
gk.estimate[[1]]=estimate(rand = data$rand, tr = data$tr, y = data$effort)
gk.estimate[[2]]=estimate(rand = data$rand, tr = data$tr, y = data$trivialization)
gk.estimate[[3]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_1`)
gk.estimate[[4]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_2`)
gk.estimate[[5]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_3`)
gk.estimate[[6]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_4`)
gk.estimate[[7]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_5`)
gk.estimate[[8]]=estimate(rand = data$rand, tr = data$tr, y = data$`emotion_6`)

list.name=c("Effort","Trivialization",
            "Anxious","Proud",
            "Angry","Hope",
            "Worry","Excited")

names(gk.estimate)=list.name # remember to give every estimate a name

gk.estimate_Effort = as.data.frame(gk.estimate$Effort)
gk.estimate_Trivialization = as.data.frame(gk.estimate$Trivialization)
gk.estimate_Anxious = as.data.frame(gk.estimate$Anxious)
gk.estimate_Proud = as.data.frame(gk.estimate$Proud)
gk.estimate_Angry = as.data.frame(gk.estimate$Angry)
gk.estimate_Hope = as.data.frame(gk.estimate$Hope)
gk.estimate_Worry = as.data.frame(gk.estimate$Worry)
gk.estimate_Excited = as.data.frame(gk.estimate$Excited)

gk.estimate_Effort['name'] = rep("Effort", times=4)
gk.estimate_Trivialization['name'] = rep("Trivialization", times=4)
gk.estimate_Anxious['name'] = rep("Anxious", times=4)
gk.estimate_Proud['name'] = rep("Proud", times=4)
gk.estimate_Angry['name'] = rep("Angry", times=4)
gk.estimate_Hope['name'] = rep("Hope", times=4)
gk.estimate_Worry['name'] = rep("Worry", times=4)
gk.estimate_Excited['name'] = rep("Excited", times=4)

gk.estimate.rap=rbind.data.frame(gk.estimate_Effort, gk.estimate_Trivialization,
                                 gk.estimate_Anxious, gk.estimate_Proud,
                                 gk.estimate_Angry, gk.estimate_Hope,
                                 gk.estimate_Worry, gk.estimate_Excited)

gk.estimate.rap$Estimate = round(gk.estimate.rap$Estimate, 3)
gk.estimate.rap$SE = round(gk.estimate.rap$SE, 3)
gk.estimate.rap$t = round(gk.estimate.rap$t, 3)
gk.estimate.rap$p = round(gk.estimate.rap$p, 3)

# 10.3 gk.estimate.rap
write.csv(gk.estimate.rap, file = file.path(output_dir, "10_potential_mechanisms.csv"), row.names = TRUE)



### 11 High/Low -> ATE/selector/non-selector (Y is PCA) ###

# 11.1 political ideology

# 11.1.1 high.ideology
dt_high.ideology <- as.data.frame(matrix(nrow=nrow(high.ideology)*2,ncol=0)) 
dt_high.ideology$central <- append(high.ideology$central_trust, high.ideology$central_satisfaction)
dt_high.ideology$local <- append(high.ideology$local_trust, high.ideology$local_satisfaction)
dt_high.ideology$evaluation <- append(high.ideology$evaluation_now, high.ideology$evaluation_future)
dt_high.ideology$confidence <- append(high.ideology$confidence_1, high.ideology$confidence_2)
dt_high.ideology$efficacy <- append(high.ideology$efficacy_1, high.ideology$efficacy_2)
dt_high.ideology$pride <- append(high.ideology$pride_1, high.ideology$pride_2)
dt_high.ideology$rand <- append(high.ideology$rand, high.ideology$rand)
dt_high.ideology$tr <- append(high.ideology$tr, high.ideology$tr)

result_dt_high.ideology=principal(dt_high.ideology[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_high.ideology
dt_high.ideology$pca_score=result_dt_high.ideology$score[,1]
dt_high.ideology$pca_score

gk_1.1.1.estimate=list()
gk_1.1.1.estimate[[1]] = estimate(rand = dt_high.ideology$rand, tr = dt_high.ideology$tr, y = dt_high.ideology$pca_score)

list_.name=c("PCA Score")
names(gk_1.1.1.estimate)=list_.name
gk_11.1.1.estimate_pca_score = as.data.frame(gk_1.1.1.estimate$`PCA Score`)
gk_11.1.1.estimate_pca_score['name'] = rep("PCA Score", times=4)
gk_11.1.1.estimate_pca_score$Estimate = round(gk_11.1.1.estimate_pca_score$Estimate, 3)
gk_11.1.1.estimate_pca_score$SE = round(gk_11.1.1.estimate_pca_score$SE, 3)
gk_11.1.1.estimate_pca_score$t = round(gk_11.1.1.estimate_pca_score$t, 3)
gk_11.1.1.estimate_pca_score$p = round(gk_11.1.1.estimate_pca_score$p, 3)
write.csv(gk_11.1.1.estimate_pca_score, file = file.path(output_dir, "11_1_pca_on_pro_regime_attitudes_high_ideology.csv"), row.names = TRUE)



# 11.1.2 low.ideology
dt_low.ideology <- as.data.frame(matrix(nrow=nrow(low.ideology)*2,ncol=0)) 
dt_low.ideology$central <- append(low.ideology$central_trust, low.ideology$central_satisfaction)
dt_low.ideology$local <- append(low.ideology$local_trust, low.ideology$local_satisfaction)
dt_low.ideology$evaluation <- append(low.ideology$evaluation_now, low.ideology$evaluation_future)
dt_low.ideology$confidence <- append(low.ideology$confidence_1, low.ideology$confidence_2)
dt_low.ideology$efficacy <- append(low.ideology$efficacy_1, low.ideology$efficacy_2)
dt_low.ideology$pride <- append(low.ideology$pride_1, low.ideology$pride_2)
dt_low.ideology$rand <- append(low.ideology$rand, low.ideology$rand)
dt_low.ideology$tr <- append(low.ideology$tr, low.ideology$tr)

result_dt_low.ideology=principal(dt_low.ideology[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_low.ideology
dt_low.ideology$pca_score=result_dt_low.ideology$score[,1]
dt_low.ideology$pca_score

gk_11.1.2.estimate=list()
gk_11.1.2.estimate[[1]] = estimate(rand = dt_low.ideology$rand, tr = dt_low.ideology$tr, y = dt_low.ideology$pca_score)

list_.name=c("PCA Score")
names(gk_11.1.2.estimate)=list_.name
gk_11.1.2.estimate_pca_score = as.data.frame(gk_11.1.2.estimate$`PCA Score`)
gk_11.1.2.estimate_pca_score['name'] = rep("PCA Score", times=4)
gk_11.1.2.estimate_pca_score$Estimate = round(gk_11.1.2.estimate_pca_score$Estimate, 3)
gk_11.1.2.estimate_pca_score$SE = round(gk_11.1.2.estimate_pca_score$SE, 3)
gk_11.1.2.estimate_pca_score$t = round(gk_11.1.2.estimate_pca_score$t, 3)
gk_11.1.2.estimate_pca_score$p = round(gk_11.1.2.estimate_pca_score$p, 3)
write.csv(gk_11.1.2.estimate_pca_score, file = file.path(output_dir, "11_1_pca_on_pro_regime_attitudes_low_ideology.csv"), row.names = TRUE)



# 11.2 political interest

# 11.2.1 high.interest
dt_high.interest <- as.data.frame(matrix(nrow=nrow(high.interest)*2,ncol=0))
dt_high.interest$central <- append(high.interest$central_trust, high.interest$central_satisfaction)
dt_high.interest$local <- append(high.interest$local_trust, high.interest$local_satisfaction)
dt_high.interest$evaluation <- append(high.interest$evaluation_now, high.interest$evaluation_future)
dt_high.interest$confidence <- append(high.interest$confidence_1, high.interest$confidence_2)
dt_high.interest$efficacy <- append(high.interest$efficacy_1, high.interest$efficacy_2)
dt_high.interest$pride <- append(high.interest$pride_1, high.interest$pride_2)
dt_high.interest$rand <- append(high.interest$rand, high.interest$rand)
dt_high.interest$tr <- append(high.interest$tr, high.interest$tr)

result_dt_high.interest=principal(dt_high.interest[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_high.interest
dt_high.interest$pca_score=result_dt_high.interest$score[,1]
dt_high.interest$pca_score

gk_11.2.1.estimate=list()
gk_11.2.1.estimate[[1]] = estimate(rand = dt_high.interest$rand, tr = dt_high.interest$tr, y = dt_high.interest$pca_score)

list_.name=c("PCA Score")
names(gk_11.2.1.estimate)=list_.name
gk_11.2.1.estimate_pca_score = as.data.frame(gk_11.2.1.estimate$`PCA Score`)
gk_11.2.1.estimate_pca_score['name'] = rep("PCA Score", times=4)

gk_11.2.1.estimate_pca_score$Estimate = round(gk_11.2.1.estimate_pca_score$Estimate, 3)
gk_11.2.1.estimate_pca_score$SE = round(gk_11.2.1.estimate_pca_score$SE, 3)
gk_11.2.1.estimate_pca_score$t = round(gk_11.2.1.estimate_pca_score$t, 3)
gk_11.2.1.estimate_pca_score$p = round(gk_11.2.1.estimate_pca_score$p, 3)

write.csv(gk_11.2.1.estimate_pca_score, file = file.path(output_dir, "11_2_pca_on_pro_regime_attitudes_high_interest.csv"), row.names = TRUE)



# 40.2.2 low.interest
dt_low.interest <- as.data.frame(matrix(nrow=nrow(low.interest)*2,ncol=0))
dt_low.interest$central <- append(low.interest$central_trust, low.interest$central_satisfaction)
dt_low.interest$local <- append(low.interest$local_trust, low.interest$local_satisfaction)
dt_low.interest$evaluation <- append(low.interest$evaluation_now, low.interest$evaluation_future)
dt_low.interest$confidence <- append(low.interest$confidence_1, low.interest$confidence_2)
dt_low.interest$efficacy <- append(low.interest$efficacy_1, low.interest$efficacy_2)
dt_low.interest$pride <- append(low.interest$pride_1, low.interest$pride_2)
dt_low.interest$rand <- append(low.interest$rand, low.interest$rand)
dt_low.interest$tr <- append(low.interest$tr, low.interest$tr)

result_dt_low.interest=principal(dt_low.interest[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_low.interest
dt_low.interest$pca_score=result_dt_low.interest$score[,1]
dt_low.interest$pca_score

gk_11.2.2.estimate=list()
gk_11.2.2.estimate[[1]] = estimate(rand = dt_low.interest$rand, tr = dt_low.interest$tr, y = dt_low.interest$pca_score)

list_.name=c("PCA Score")

names(gk_11.2.2.estimate)=list_.name

gk_11.2.2.estimate_pca_score = as.data.frame(gk_11.2.2.estimate$`PCA Score`)

gk_11.2.2.estimate_pca_score['name'] = rep("PCA Score", times=4)

gk_11.2.2.estimate_pca_score$Estimate = round(gk_11.2.2.estimate_pca_score$Estimate, 3)
gk_11.2.2.estimate_pca_score$SE = round(gk_11.2.2.estimate_pca_score$SE, 3)
gk_11.2.2.estimate_pca_score$t = round(gk_11.2.2.estimate_pca_score$t, 3)
gk_11.2.2.estimate_pca_score$p = round(gk_11.2.2.estimate_pca_score$p, 3)

write.csv(gk_11.2.2.estimate_pca_score, file = file.path(output_dir, "11_2_pca_on_pro_regime_attitudes_low_interest.csv"), row.names = TRUE)



# 11.3 political news consumption

# 11.3.1 high.news_consumption
dt_high.news_consumption <- as.data.frame(matrix(nrow=nrow(high.news_consumption)*2,ncol=0)) 
dt_high.news_consumption$central <- append(high.news_consumption$central_trust, high.news_consumption$central_satisfaction)
dt_high.news_consumption$local <- append(high.news_consumption$local_trust, high.news_consumption$local_satisfaction)
dt_high.news_consumption$evaluation <- append(high.news_consumption$evaluation_now, high.news_consumption$evaluation_future)
dt_high.news_consumption$confidence <- append(high.news_consumption$confidence_1, high.news_consumption$confidence_2)
dt_high.news_consumption$efficacy <- append(high.news_consumption$efficacy_1, high.news_consumption$efficacy_2)
dt_high.news_consumption$pride <- append(high.news_consumption$pride_1, high.news_consumption$pride_2)
dt_high.news_consumption$rand <- append(high.news_consumption$rand, high.news_consumption$rand)
dt_high.news_consumption$tr <- append(high.news_consumption$tr, high.news_consumption$tr)

result_dt_high.news_consumption=principal(dt_high.news_consumption[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_high.news_consumption
dt_high.news_consumption$pca_score=result_dt_high.news_consumption$score[,1]
dt_high.news_consumption$pca_score

gk_11.3.1.estimate=list()
gk_11.3.1.estimate[[1]] = estimate(rand = dt_high.news_consumption$rand, tr = dt_high.news_consumption$tr, y = dt_high.news_consumption$pca_score)

list_.name=c("PCA Score")
names(gk_11.3.1.estimate)=list_.name
gk_11.3.1.estimate_pca_score = as.data.frame(gk_11.3.1.estimate$`PCA Score`)
gk_11.3.1.estimate_pca_score['name'] = rep("PCA Score", times=4)

gk_11.3.1.estimate_pca_score$Estimate = round(gk_11.3.1.estimate_pca_score$Estimate, 3)
gk_11.3.1.estimate_pca_score$SE = round(gk_11.3.1.estimate_pca_score$SE, 3)
gk_11.3.1.estimate_pca_score$t = round(gk_11.3.1.estimate_pca_score$t, 3)
gk_11.3.1.estimate_pca_score$p = round(gk_11.3.1.estimate_pca_score$p, 3)
write.csv(gk_11.3.1.estimate_pca_score, file = file.path(output_dir, "11_3_pca_on_pro_regime_attitudes_high_news_consumption.csv"), row.names = TRUE)



# 11.3.2 low.news_consumption
dt_low.news_consumption <- as.data.frame(matrix(nrow=nrow(low.news_consumption)*2,ncol=0)) 
dt_low.news_consumption$central <- append(low.news_consumption$central_trust, low.news_consumption$central_satisfaction)
dt_low.news_consumption$local <- append(low.news_consumption$local_trust, low.news_consumption$local_satisfaction)
dt_low.news_consumption$evaluation <- append(low.news_consumption$evaluation_now, low.news_consumption$evaluation_future)
dt_low.news_consumption$confidence <- append(low.news_consumption$confidence_1, low.news_consumption$confidence_2)
dt_low.news_consumption$efficacy <- append(low.news_consumption$efficacy_1, low.news_consumption$efficacy_2)
dt_low.news_consumption$pride <- append(low.news_consumption$pride_1, low.news_consumption$pride_2)
dt_low.news_consumption$rand <- append(low.news_consumption$rand, low.news_consumption$rand)
dt_low.news_consumption$tr <- append(low.news_consumption$tr, low.news_consumption$tr)

result_dt_low.news_consumption=principal(dt_low.news_consumption[,c(1,2,4,5)], nfactors = 1, residuals = FALSE,rotate="varimax")
result_dt_low.news_consumption
dt_low.news_consumption$pca_score=result_dt_low.news_consumption$score[,1]
dt_low.news_consumption$pca_score


gk_11.3.2.estimate=list()
gk_11.3.2.estimate[[1]] = estimate(rand = dt_low.news_consumption$rand, tr = dt_low.news_consumption$tr, y = dt_low.news_consumption$pca_score)

list_.name=c("PCA Score")

names(gk_11.3.2.estimate)=list_.name

gk_11.3.2.estimate_pca_score = as.data.frame(gk_11.3.2.estimate$`PCA Score`)

gk_11.3.2.estimate_pca_score['name'] = rep("PCA Score", times=4)

gk_11.3.2.estimate_pca_score$Estimate = round(gk_11.3.2.estimate_pca_score$Estimate, 3)
gk_11.3.2.estimate_pca_score$SE = round(gk_11.3.2.estimate_pca_score$SE, 3)
gk_11.3.2.estimate_pca_score$t = round(gk_11.3.2.estimate_pca_score$t, 3)
gk_11.3.2.estimate_pca_score$p = round(gk_11.3.2.estimate_pca_score$p, 3)
write.csv(gk_11.3.2.estimate_pca_score, file = file.path(output_dir, "11_3_pca_on_pro_regime_attitudes_low_news_consumption.csv"), row.names = TRUE)


